%matplotlib inline
%config InlineBackend.figure_format='svg'
import pandas as pd
from datetime import datetime, timedelta
import seaborn as sns
sns.set()
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.dates as mdates
from scipy.signal import savgol_filter # smooth the data
#plt.style.use('seaborn-white')
plt.style.use('seaborn-white')
pdr = pd.read_csv( "https://raw.githubusercontent.com/ClarksonAirlab/2019_sensor_workshop/master/Python/Tutorial/Tutorial%20Data/pDR_2_20190805_1.csv")
HW1 = pd.read_csv("https://raw.githubusercontent.com/ClarksonAirlab/2019_sensor_workshop/master/Python/Tutorial/Tutorial%20Data/argon24.csv")
HW2 = pd.read_csv("https://raw.githubusercontent.com/ClarksonAirlab/2019_sensor_workshop/master/Python/Tutorial/Tutorial%20Data/argon44.csv")
pdr.head()
pdr.info()
pdr["DateTime"] = pdr["date"] + pdr["time"]
pdr["DateTime"] = pd.to_datetime(pdr["DateTime"])
cols = pdr.columns[pdr.dtypes.eq('object')]
pdr[cols] = pdr[cols].apply(pd.to_numeric,errors = 'coerce')
pdr.info()
pdr.head()
HW1.head()
HW1.columns = ["DateTime","id","pm2.5hw1","pm10","Temp","RH"]
HW1.head()
HW1.info()
HW1["DateTime"] = pd.to_datetime(HW1["DateTime"])
HW1.info()
HW2.columns = ["DateTime","id","pm2.5hw2","pm10","Temp","RH"]
HW2["DateTime"] = pd.to_datetime(HW2["DateTime"])
pdr.index=pdr['DateTime']
HW1.index=HW1['DateTime']
HW2.index=HW2['DateTime']
HW1=HW1.sort_values('id')
HW2=HW2.sort_values('id')
pdr=pdr['2019-08-05 13:26:00':'2019-08-06 07:50:00']
HW1=HW1['2019-08-05 13:26:00':'2019-08-06 07:50:00']
HW2=HW2['2019-08-05 13:26:00':'2019-08-06 07:50:00']
fig, ax1 = plt.subplots()
fig.set_size_inches(10, 7)
ax1.grid(False)
ax1.plot(pdr['DateTime'],pdr['pm2.5'], color='k',linewidth=1,label='$PDR$')
ax1.plot(HW1['DateTime'],HW1['pm2.5hw1'], color='r',linewidth=1,label='$HW1$')
ax1.plot(HW2['DateTime'],HW2['pm2.5hw2'], color='g',linewidth=1,label='$HW2$')
ax1.set_xlabel('DateTime',color='k',fontsize=16,labelpad=13)
ax1.set_ylabel('$PM2.5$ ($ug/m^3$)',color='k',fontsize=16,labelpad=13)
pdrn=pdr.resample('5s').mean()
pdrn=pdrn.reset_index()
fig, ax1 = plt.subplots()
fig.set_size_inches(10, 7)
ax1.grid(False)
ax1.plot(pdr['DateTime'],pdr['pm2.5'], color='k',linewidth=1,label='$PDR$')
ax1.plot(pdrn['DateTime'],pdrn['pm2.5'], color='r',linewidth=1,label='$PDRN$')
HW1=HW1.drop(['DateTime'], axis=1)
HW1=HW1.reset_index()
HW2=HW2.drop(['DateTime'], axis=1)
HW2=HW2.reset_index()
df = pd.merge_asof(pdrn,HW1[['DateTime','pm2.5hw1']], on='DateTime', tolerance=pd.Timedelta("5 minutes"), direction='nearest').fillna('NaN')
df = pd.merge_asof(df,HW2[['DateTime','pm2.5hw2']], on='DateTime', tolerance=pd.Timedelta("5 minutes"), direction='nearest').fillna('NaN')
df.head()
plt.figure(figsize=(10,8))
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.scatter(df['pm2.5hw1'],df['pm2.5'], color= 'k',alpha=0.9,label='hw1')
plt.scatter(df['pm2.5hw2'],df['pm2.5'], color= 'r',alpha=0.9,label='hw2')
plt.legend( loc='best',fontsize=17)
plt.title("Test with PDR", fontsize=16)
plt.ylabel('PDR pm2.5',fontsize=16,color='k',labelpad=13)
plt.xlabel('Honeywell pm2.5',fontsize=16,color='k',labelpad=13)
#plt.show()
#plt.savefig('books_read.png',dpi=300)
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
df['pdr']=df['pm2.5']
df['hw1']=df['pm2.5hw1']
df.head()
results = smf.ols('pdr ~ hw1', data=df).fit()
print(results.summary())
df['chw1']=df['hw1']*2.6652+6.8350
fig, ax1 = plt.subplots()
fig.set_size_inches(10, 7)
ax1.grid(False)
ax1.plot(df['DateTime'],df['pdr'], color='k',linewidth=1,label='PDR')
ax1.plot(df['DateTime'],df['chw1'], color='r',linewidth=1,label='chw1')
plt.figure(figsize=(10,8))
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.scatter(df['chw1'],df['pdr'], color= 'k',alpha=0.9,label='hw1')
plt.legend( loc='best',fontsize=17)
plt.title("Test with PDR", fontsize=16)
plt.ylabel('PDR pm2.5',fontsize=16,color='k',labelpad=13)
plt.xlabel('Honeywell pm2.5',fontsize=16,color='k',labelpad=13)
#plt.show()
#plt.savefig('books_read.png',dpi=300)
import seaborn as sns
ax = sns.regplot(x="chw1", y="pdr", data=df)